#Dennis moskov, master Thesis
#Find best variable number m with Random Forest
#using "randomForest" package

#install.packages("randomForest")
#library(randomForest)

#randomly shuffle the data
set.seed(42)                      # seed for reproducibility
DBf<-useDB[sample(nrow(useDB)),]

#find best number of used variables
nv <- tuneRF(DBf[,-length(DBf)], DBf[,length(DBf)],stepFactor=1.5, improve=0.005,ntreeTry=500,mtryStart=2)

bestnv<-2
bestnv_new<-bestnv+1
while(bestnv != bestnv_new) {
	#min(nv[,2])    					#min OOBError
	#which(grepl(min(nv[,2]), nv[,2]))  			#row of min OOBError
	rangerow<-c(which(grepl(min(nv[,2]), nv[,2]))-1,which(grepl(min(nv[,2]), nv[,2]))+1)
								# +/- 1 row ofmin OOBError
	bestnv<-nv[which(grepl(min(nv[,2]), nv[,2]))]  		#mtry of min OOBError
	rangenv<-nv[rangerow,1]					# +/- mtry of min OOBError
	
	nv <- tuneRF(DBf[,-length(DBf)], DBf[,length(DBf)],stepFactor=1.5, improve=0.005,ntreeTry=500,mtryStart=bestnv)
	bestnv_new<-nv[which(grepl(min(nv[,2]), nv[,2]))]
}
names(rangenv)<-NULL



















